import pandas as pd
import numpy as np
import statsmodels.api as sm
import sqlite3
import pickle
import matplotlib.pyplot as plt

# Read from database
DB = sqlite3.connect('Database/analysisdata.db')
dt = pd.read_sql_query("SELECT * from Data1969_09", DB)
dt = dt.set_index('usid')
DB.close()

dt = dt[['mod1a_num', 'mod1b_num', 'mod1c_num', 'mod1d_num', 'mod1e_num',
       'mod1f_num', 'mod1g_num', 'mod1h_num', 'mod1i_num', 'mod1j_num',
       'mod1k_num', 'mod1l_num', 'mod1m_num', 'mod1n_num', 'mod1o_num',
       'mod1p_num', 'mod1q_num', 'mod1r_num', 'mod1s_num', 'my4', '2a', '2b',
       '2c', '2d', '2e', '2f', '3a', '3b', '3c', 'date', 'fr_strikes_mean',
       'educ_c', 'sec_c', 'econ_c', 'admin_c', 'health_c', 'soccap_c']]

# The mean of regional safety, economy, and civic society outcome. Notice that the mean of the safety outcome is
# calculated by 1 - dt['sec_c'].mean() because in the original data, large 'sec_c' means less safety. So I use
# 1 - dt['sec-c'] to make it consistent to the naive understanding that larger safety outcome means safer.

1 - dt['sec_c'].mean()
dt['econ_c'].mean()
dt['soccap_c'].mean()

dt['fr_strikes_mean'].sum()

fig, ax = plt.subplots(figsize=(16,9))
fontsize = 25
Allnames = np.array(["Enemy Military Presence"
                           , "Enemy Military Activity"
                           , "Impact of Military Activity"
                           , "Friendly Military Presence"
                           , "Friendly Military Activity"
                           , "Law Enforcement"
                           , "PSDF Activity"
                           , "Enemy Political Presence"
                           , "Enemy Political Activity"
                           , "Administration"
                           , "RD Cadre"
                           , "Information PSYOPS"
                           , "Political Mobilization"
                           , "Public Health"
                           , "Education"
                           , "Social Welfare"
                           , "Development Assistance"
                           , "Economic Activity"
                           , "Land Tenure"])
ax.barh(Allnames, dt.mean(axis=0)[:19], height=.5)
ax.set_xlabel('Mean of the submodel scores in HES', fontsize=fontsize)
ax.set_ylabel('Submodel names', fontsize=fontsize)
ax.set_xlim([0,5])
ax.tick_params(axis='both', which='major', labelsize=fontsize)
fig.tight_layout()
fig.show()
fig.savefig('Visualization/DescriptiveAnalysis/mean_submodel_scores.pdf')